#====# Appendix D: Estimation window goodness-of-fit #====#

# Load libraries and set defaults ----
library(lemon)
library(tidyverse)
library(tidylog, warn.conflicts = FALSE)
source("aux/plot_theme.R")

# Import data ----
stocks <- read_rds("data_out/stocks_analysis.rds") # main analysis dataset
estim <- read_rds("estim/estimation_windows.rds") # results of the estimation window

# Extract GOF measures ----
win30_3folds <- estim$win30_3folds
win30_5folds <- estim$win30_5folds
win30_10folds <- estim$win30_10folds
win30_15folds <- estim$win30_15folds
win30_ols <- estim$win30_ols

win90_3folds <- estim$win90_3folds
win90_5folds <- estim$win90_5folds
win90_10folds <- estim$win90_10folds
win90_15folds <- estim$win90_15folds
win90_ols <- estim$win90_ols

win180_3folds <- estim$win180_3folds
win180_5folds <- estim$win180_5folds
win180_10folds <- estim$win180_10folds
win180_15folds <- estim$win180_15folds
win180_ols <- estim$win180_ols

fit <- map(.x = 1:length(win30_3folds),
           .f = ~win30_3folds[[.x]]$fit) %>%
  bind_rows() %>%
  mutate(estim_beg = -30,
         estim_end = -5L,
         n_folds = 3) %>%
  rbind(map(.x = 1:length(win90_3folds),
            .f = ~win90_3folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -90,
                 estim_end = -5L,
                 n_folds = 3)) %>%
  rbind(map(.x = 1:length(win180_3folds),
            .f = ~win180_3folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -180,
                 estim_end = -5L,
                 n_folds = 3)) %>%
  rbind(map(.x = 1:length(win30_5folds),
            .f = ~win30_5folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -30,
                 estim_end = -5L,
                 n_folds = 5)) %>%
  rbind(map(.x = 1:length(win90_5folds),
            .f = ~win90_5folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -90,
                 estim_end = -5L,
                 n_folds = 5)) %>%
  rbind(map(.x = 1:length(win180_5folds),
            .f = ~win180_5folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -180,
                 estim_end = -5L,
                 n_folds = 5)) %>%
  rbind(map(.x = 1:length(win30_10folds),
            .f = ~win30_10folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -30,
                 estim_end = -5L,
                 n_folds = 10)) %>%
  rbind(map(.x = 1:length(win90_10folds),
            .f = ~win90_10folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -90,
                 estim_end = -5L,
                 n_folds = 10)) %>%
  rbind(map(.x = 1:length(win180_10folds),
            .f = ~win180_10folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -180,
                 estim_end = -5L,
                 n_folds = 10)) %>%
  rbind(map(.x = 1:length(win30_15folds),
            .f = ~win30_15folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -30,
                 estim_end = -5L,
                 n_folds = 15)) %>%
  rbind(map(.x = 1:length(win90_15folds),
            .f = ~win90_15folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -90,
                 estim_end = -5L,
                 n_folds = 15)) %>%
  rbind(map(.x = 1:length(win180_15folds),
            .f = ~win180_15folds[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -180,
                 estim_end = -5L,
                 n_folds = 15)) %>%
  mutate(type = "LASSO") %>%
  rbind(map(.x = 1:length(win30_ols),
            .f = ~win30_ols[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -30,
                 estim_end = -5L,
                 n_folds = 0,
                 type = "OLS")) %>%
  rbind(map(.x = 1:length(win90_ols),
            .f = ~win90_ols[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -90,
                 estim_end = -5L,
                 n_folds = 0,
                 type = "OLS")) %>%
  rbind(map(.x = 1:length(win180_ols),
            .f = ~win180_ols[[.x]]$fit) %>%
          bind_rows() %>%
          mutate(estim_beg = -180,
                 estim_end = -5L,
                 n_folds = 0,
                 type = "OLS")) %>%
  as_tibble() %>%
  mutate(estim_beg_fct = paste0(estim_beg, " days") %>%
           fct_relevel(c("-180 days", "-90 days")),
         n_folds_fct = ifelse(type == "LASSO", 
                              paste0("LASSO, CV with ", n_folds, " folds"),
                              paste0("OLS (no folds)")) %>%
           fct_relevel(c("OLS (no folds)", "LASSO, CV with 3 folds", "LASSO, CV with 5 folds"))) %>%
  mutate(FCPA_sample = as.numeric(ticker %in% stocks$ticker_symbol[stocks$FCPA_sample == 1]))

# Figure D.1: Distribution of R2 from market models of past FCPA targets ----
plot <- fit %>%
  filter(FCPA_sample == 1) %>%
  ggplot(aes(x = R2, col = estim_beg_fct, linetype = estim_beg_fct)) +
  geom_density(key_glyph = "path", fill = "black", alpha = .1) +
  geom_vline(data = fit %>%
               filter(FCPA_sample == 1) %>%
               filter(!is.infinite(R2)) %>%
               group_by(estim_beg_fct, n_folds_fct) %>%
               reframe(xintercept = mean(R2, na.rm = TRUE)),
             mapping = aes(xintercept = xintercept, col = estim_beg_fct, linetype = estim_beg_fct),
             show.legend = FALSE) +
  geom_label(data = fit %>%
               filter(FCPA_sample == 1) %>%
               filter(!is.infinite(R2)) %>%
               group_by(estim_beg_fct, n_folds_fct) %>%
               reframe(x = mean(R2, na.rm = TRUE)) %>%
               mutate(y = case_when(estim_beg_fct == "-30 days" ~ 5.5,
                                    estim_beg_fct == "-90 days" ~ 3.5,
                                    estim_beg_fct == "-180 days" ~ 1.5),
                      label = sprintf("%.3f", x)) %>%
               distinct(),
             mapping = aes(x = x, y = y, label = label),
             show.legend = FALSE,
             alpha = .8) +
  facet_rep_wrap(~ n_folds_fct, nrow = 2, repeat.tick.labels = "all") +
  ylab("Density") + xlab(expression("Market model " * R^2 * " (estimation window)")) +
  scale_linetype_manual("Estimation window start", values = c("dotted", "dashed", "solid")) +
  scale_color_manual("Estimation window start", values = c("grey50", "grey30", "black")) +
  coord_cartesian(ylim = c(0, 7)) +
  theme(legend.position = "bottom", legend.title = element_text(hjust = 0.5)) +
  guides(linetype = guide_legend(title.position = "top", label.position = "bottom", 
                                 override.aes = list(alpha = 1)))

reposition_legend(plot, "center", panel = "panel-3-2")
dev.copy("plots/figure_D1.pdf", device = pdf,
         width = 9, height = 7)
dev.off()

# Figure D.2: Distribution of R2 from market models of matched placebo ----
plot <- fit %>%
  filter(FCPA_sample == 0) %>%
  ggplot(aes(x = R2, col = estim_beg_fct, linetype = estim_beg_fct)) +
  geom_density(key_glyph = "path", fill = "black", alpha = .1) +
  geom_vline(data = fit %>%
               filter(FCPA_sample == 0) %>%
               filter(!is.infinite(R2)) %>%
               group_by(estim_beg_fct, n_folds_fct) %>%
               reframe(xintercept = mean(R2, na.rm = TRUE)),
             mapping = aes(xintercept = xintercept, col = estim_beg_fct, linetype = estim_beg_fct),
             show.legend = FALSE) +
  geom_label(data = fit %>%
               filter(FCPA_sample == 0) %>%
               filter(!is.infinite(R2)) %>%
               group_by(estim_beg_fct, n_folds_fct) %>%
               reframe(x = mean(R2, na.rm = TRUE)) %>%
               mutate(y = case_when(estim_beg_fct == "-30 days" ~ 5.5,
                                    estim_beg_fct == "-90 days" ~ 3.5,
                                    estim_beg_fct == "-180 days" ~ 1.5),
                      label = sprintf("%.3f", x)) %>%
               distinct(),
             mapping = aes(x = x, y = y, label = label),
             show.legend = FALSE,
             alpha = .8) +
  facet_rep_wrap(~ n_folds_fct, nrow = 2, repeat.tick.labels = "all") +
  coord_cartesian(ylim = c(0, 7)) +
  ylab("Density") + xlab(expression("Market model " * R^2 * " (estimation window)")) +
  scale_linetype_manual("Estimation window start", values = c("dotted", "dashed", "solid")) +
  scale_color_manual("Estimation window start", values = c("grey50", "grey30", "black")) +
  theme(legend.position = "bottom", legend.title = element_text(hjust = 0.5)) +
  guides(linetype = guide_legend(title.position = "top", label.position = "bottom", 
                                 override.aes = list(alpha = 1)))

reposition_legend(plot, "center", panel = "panel-3-2")
dev.copy("plots/figure_D2.pdf", device = pdf,
         width = 9, height = 7)
dev.off()

#====# The End #====#